home *** CD-ROM | disk | FTP | other *** search
- #!/usr/local/bin/gawk -f
- #!/usr/bin/awk -f
- # @(#) postings.gawk 1.0 94/03/09
- # 93/01/24 john h. dubois iii (john@armory.com)
- # 93/07/02 Added help, fixed Total.
- # 94/03/09 Use gawk so - options can be given
- BEGIN {
- Usage = "Usage: postings [-h] article-file ..."
- if (ARGC < 2) {
- print Usage
- print "Use -h for help."
- exit(1)
- }
- if (ARGV[1] ~ "^[-+]h$") {
- print \
- "postings: report number of postings to groups.\n"\
- Usage "\n"\
- "article-file is a file that postings are saved to, in mailbox format."
- exit(0)
- }
- GoodFile = Total = 0
- for (FileNum = 1; FileNum < ARGC; FileNum++) {
- InFile = ARGV[FileNum]
- while ((ret = (getline < InFile)) == 1) {
- if ($1 == "\001\001\001\001") {
- InHeader = 1
- }
- else if ($0 == "")
- InHeader = 0
- else if (InHeader && $1 == "Newsgroups:") {
- Total++
- split($2,Groups,",")
- for (i = 1; i in Groups; i++)
- GroupCt[Groups[i]]++
- }
- }
- close(InFile)
- if (ret)
- printf "Error reading file %s.\n",InFile
- else
- GoodFile = 1
- }
- if (!GoodFile)
- exit(1)
- NGroup = qsortByArbIndex(GroupCt,k)
- for (i = 1; i <= NGroup; i++) {
- group = k[i]
- printf "%3d %s\n",GroupCt[group],group
- }
- printf "%3d Total\n",Total
- }
-
- ### Begin qsort routines
-
- # Arr[] is an array of values with arbitrary indices.
- # k[] is returned with numeric indices 1..n.
- # The values in k[] are the indices of Arr[],
- # ordered so that if Arr[] is stepped through
- # in the order Arr[k[1]] .. Arr[k[n]], it will be stepped
- # through in order of the values of its elements.
- # The return value is the number of elements in the arrays (n).
- function qsortArbIndByValue(Arr,k, ArrInd,ElNum) {
- ElNum = 0
- for (ArrInd in Arr)
- k[++ElNum] = ArrInd
- qsortSegment(Arr,k,1,ElNum)
- return ElNum
- }
-
- # Sort a segment of an array.
- # Arr[] contains data with arbitrary indices.
- # k[] has indices 1..nelem, with the indices of arr[] as values.
- # This function sorts the elements of arr that are pointed to by
- # k[start..end], swapping the values of elements of k[] so that
- # when this function returns arr[k[start..end]] will be in order.
- function qsortSegment(Arr,k,start,end, left,right,sepval,tmp,tmpe,tmps) {
- # handle two-element case explicitly for a tiny speedup
- if ((end - start) == 1) {
- if (Arr[tmps = k[start]] > Arr[tmpe = k[end]]) {
- k[start] = tmpe
- k[end] = tmps
- }
- return
- }
- # Make sure comparisons act on these as numbers
- left = start+0
- right = end+0
- sepval = Arr[k[int((left + right) / 2)]]
- # Make every element <= sepval be to the left of every element > sepval
- while (left < right) {
- while (Arr[k[left]] < sepval)
- left++
- while (Arr[k[right]] > sepval)
- right--
- if (left < right) {
- tmp = k[left]
- k[left++] = k[right]
- k[right--] = tmp
- }
- }
- if (left == right)
- if (Arr[k[left]] < sepval)
- left++
- else
- right--
- if (start < right)
- qsortSegment(Arr,k,start,right)
- if (left < end)
- qsortSegment(Arr,k,left,end)
- }
-
- # Arr[] is an array of values with arbitrary indices.
- # k[] is returned with numeric indices 1..n.
- # The values in k are the indices of Arr[],
- # ordered so that if Arr[] is stepped through
- # in the order Arr[k[1]] .. Arr[k[n]], it will be stepped
- # through in order of the values of its indices.
- # The return value is the number of elements in the arrays (n).
- # If the indexes are numeric, Numeric should be true, so that they can be
- # compared as such rather than as strings. Numeric indexes do not have to be
- # contiguous.
- function qsortByArbIndex(Arr,k,Numeric, ArrInd,ElNum) {
- ElNum = 0
- if (Numeric)
- # Indexes do not preserve numeric type, so must be forced
- for (ArrInd in Arr)
- k[++ElNum] = ArrInd+0
- else
- for (ArrInd in Arr)
- k[++ElNum] = ArrInd
- qsortNumIndByValue(k,1,ElNum)
- return ElNum
- }
-
- # Arr is an array of elements with contiguous numeric indexes to be sorted
- # by value.
- # start and end are the starting and ending indexes of the range to be sorted.
- function qsortNumIndByValue(Arr,start,end, left,right,sepval,tmp,tmpe,tmps) {
- # handle two-element case explicitly for a tiny speedup
- if ((start - end) == 1) {
- if ((tmps = Arr[start]) > (tmpe = Arr[end])) {
- Arr[start] = tmpe
- Arr[end] = tmps
- }
- return
- }
- left = start+0
- right = end+0
- sepval = Arr[int((left + right) / 2)]
- while (left < right) {
- while (Arr[left] < sepval)
- left++
- while (Arr[right] > sepval)
- right--
- if (left <= right) {
- tmp = Arr[left]
- Arr[left++] = Arr[right]
- Arr[right--] = tmp
- }
- }
- if (start < right)
- qsortNumIndByValue(Arr,start,right)
- if (left < end)
- qsortNumIndByValue(Arr,left,end)
- }
-
- ### End qsort routines
-